# remove(list = ls())
### loading packages
library(haven)
library(reactable)
library(sf)
library(data.table)
library(saeTrafo)
library(emdi)
library(tidyverse)In this script the SAE models FH and BHF will be calculated.
The Variable selection resulted that we will use this Formula, for further specification of the Models.
aestudio ~ p26_edad + ocu_military + ocu_manager + ocu_professional + ocu_technician + ocu_adminSupport + ocu_serviceSales + ocu_agriculture + ocu_unskilled + sex_male + read_knowing + urbrur_urban + health_insurance_yes + interior_plastered_yes + v04_revoq_Missing + car_yes + water_heater_yes + kitchen_yes
### auxillary data needed to calculate FH model
dat_census_aux <- readRDS("../data_raw/2024_census/processed/census_aux_data_aggregated.RDS")
#### load processed dummy coded census data
dat <- readRDS("../data_raw/2024_census/processed/dat_dummyCoding.RDS")
pop_table <- dat %>% group_by(ID_prov) %>% count()
#### load sample data
### 172 much better for FH
# sample <- readRDS("../data_raw/samples/transformed-2/sample_172.RDS")
sample <- readRDS("../data_raw/samples/sample_for_model_building_transformed.RDS")
sample %>% class()## [1] "data.table" "data.frame"
sample <- sample %>% as.data.frame()
# FH full nutzt aggregated/shares aus dat_census_aux
fh_formula <- Mean ~ mean_p26_edad + share_ocu_military + share_ocu_manager +
share_ocu_professional + share_ocu_technician + share_ocu_adminSupport +
share_ocu_serviceSales + share_ocu_agriculture + share_ocu_unskilled +
share_sex_male + share_read_knowing + share_urbrur_urban + share_health_insurance_yes +
share_interior_plastered_yes + share_v04_revoq_Missing + share_car_yes + share_water_heater_yes +
share_kitchen_yes
# BHF nutzt individuelle Dummyvariablen im Sample/Census
bhf_formula <- aestudio ~ p26_edad + ocu_military + ocu_manager +
ocu_professional + ocu_technician + ocu_adminSupport + ocu_serviceSales +
ocu_agriculture + ocu_unskilled + sex_male + read_knowing +
urbrur_urban + health_insurance_yes + interior_plastered_yes +
v04_revoq_Missing + car_yes + water_heater_yes + kitchen_yes
sample$aestudio %>% hist()We have a simple random sample, which makes calculating the sample variance much easier, and we dont need second order inclusion criterion, nor do we need to use bootsrap or jacknife, to calculate the direct estimate and the domain specific variance.
This is the HT estimator for the totls in a domain:\(\hat{Y}_d = \sum_{i \in s_d} \frac{y_i}{\pi_i}\)
This then is the resultig design-based variance: \[ \text{Var}_\pi(\hat{Y}_d) = \left(1 - \frac{n_d}{N_d}\right) \frac{N_d^2 S_d^2}{n_d} \]
Now we are not interest in the totals, but in the variance of the mean, which results in this formula: \[ \text{Var}_\pi(\bar{y}_d) = \frac{1}{N_d^2} \text{Var}_\pi(\hat{Y}_d) = \left(1 - \frac{n_d}{N_d}\right) \frac{S_d^2}{n_d} \]
# direct_estimates <- sample %>%
# group_by(ID_prov) %>%
# summarise(
# Mean = mean(aestudio, na.rm = TRUE),
# n_eff = sum(!is.na(aestudio)),
# Var_Mean = var(aestudio, na.rm = T)/n_eff, #### we still have t inlcude FPC
# .groups = "drop"
# )
sample <- sample %>% left_join(pop_table,by = "ID_prov")
direct_estimates <- sample %>%
group_by(ID_prov) %>%
reframe(
Mean = mean(aestudio, na.rm = TRUE),
n_eff = sum(!is.na(aestudio)),
n = mean(n,na.rm = T),
Var_Mean = (1 - (n_eff/n)) * var(aestudio, na.rm = T)/n_eff
)
### recreate direct object
direct_rec <- list(
"ind" = data.frame("Domain" = direct_estimates$ID_prov,
"Mean" = direct_estimates$Mean),
"MSE" = data.frame("Domain" = direct_estimates$ID_prov,
"Mean" = direct_estimates$Var_Mean)
)
class(direct_rec) <- c("direct", "emdi")
# direct_rec %>% class()
# direct %>% class()
# direct <- emdi::direct(y = "aestudio", smp_data = sample ,smp_domains = "ID_prov",var = T,na.rm = T,B = 500)
# saveRDS(direct,"../data_raw/simulation/processed/direct_500_bootstraps.RDS")
direct <- readRDS("../data_raw/simulation/processed/direct_500_bootstraps.RDS")
# tmp_dat <- merge(direct$MSE[,c("Domain","Mean")],direct_estimates[,c("ID_prov","Var_Mean")],by.x = "Domain", by.y = "ID_prov")
# plot(tmp_dat$Mean,tmp_dat$Var_Mean)
# direct$successful_bootstraps
# cor.test(tmp_dat$Mean,tmp_dat$Var_Mean)fh_model_null <- fh(
fixed = Mean ~ 1,
vardir = "Var_Mean",
combined_data = combined_data,
domains = "ID_prov",
method = "reml",
MSE = TRUE
)
fh_model_null$MSE$FH %>% mean()## [1] 0.7744406
### ml, becasue step funcion only works with ml
fh_model_full_step <- fh(
fixed = fh_formula,
vardir = "Var_Mean",
combined_data = combined_data,
domains = "ID_prov",
method = "ml",
MSE = TRUE
)## Warning in fh(fixed = fh_formula, vardir = "Var_Mean", combined_data =
## combined_data, : The estimate of the variance of the random effects falls at
## the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
fh_model_full_unoptimized <- fh(
fixed = fh_formula,
vardir = "Var_Mean",
combined_data = combined_data,
domains = "ID_prov",
method = "reml",
MSE = TRUE
)
fh_model_full_unoptimized$MSE$FH %>% mean()## [1] 0.3002415
## Start: AIC = 358.49
## Mean ~ mean_p26_edad + share_ocu_military + share_ocu_manager +
## share_ocu_professional + share_ocu_technician + share_ocu_adminSupport +
## share_ocu_serviceSales + share_ocu_agriculture + share_ocu_unskilled +
## share_sex_male + share_read_knowing + share_urbrur_urban +
## share_health_insurance_yes + share_interior_plastered_yes +
## share_v04_revoq_Missing + share_car_yes + share_water_heater_yes +
## share_kitchen_yes
## Warning in fh(fixed = Mean ~ share_ocu_military + share_ocu_manager +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_manager +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_manager + : The estimate of the variance of the random effects falls
## at the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_manager + : The estimate of the variance of the random effects falls
## at the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_manager + : The estimate of the variance of the random effects falls
## at the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_manager + : The estimate of the variance of the random effects falls
## at the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_manager + : The estimate of the variance of the random effects falls
## at the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_manager + : The estimate of the variance of the random effects falls
## at the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_manager + : The estimate of the variance of the random effects falls
## at the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_manager + : The estimate of the variance of the random effects falls
## at the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_manager + : The estimate of the variance of the random effects falls
## at the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_manager + : The estimate of the variance of the random effects falls
## at the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_manager + : The estimate of the variance of the random effects falls
## at the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_manager + : The estimate of the variance of the random effects falls
## at the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_manager + : The estimate of the variance of the random effects falls
## at the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
## df AIC
## - share_ocu_manager 1 356.51
## - share_interior_plastered_yes 1 356.53
## - share_ocu_unskilled 1 356.64
## - share_water_heater_yes 1 357.11
## - share_ocu_agriculture 1 357.49
## - share_ocu_military 1 357.53
## - share_urbrur_urban 1 358.19
## <none> 358.49
## - share_v04_revoq_Missing 1 358.68
## - share_ocu_serviceSales 1 358.86
## - share_sex_male 1 359.00
## - share_kitchen_yes 1 359.00
## - share_ocu_technician 1 359.26
## - share_ocu_adminSupport 1 359.58
## - share_car_yes 1 362.82
## - mean_p26_edad 1 364.29
## - share_health_insurance_yes 1 365.05
## - share_ocu_professional 1 365.80
## - share_read_knowing 1 376.66
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
##
## Step: AIC = 356.51
## Mean ~ mean_p26_edad + share_ocu_military + share_ocu_professional +
## share_ocu_technician + share_ocu_adminSupport + share_ocu_serviceSales +
## share_ocu_agriculture + share_ocu_unskilled + share_sex_male +
## share_read_knowing + share_urbrur_urban + share_health_insurance_yes +
## share_interior_plastered_yes + share_v04_revoq_Missing +
## share_car_yes + share_water_heater_yes + share_kitchen_yes
## Warning in fh(fixed = Mean ~ share_ocu_military + share_ocu_professional + :
## The estimate of the variance of the random effects falls at the interval limit.
## It is recommended to choose a larger interval for the estimation of the
## variance of the random effects (specify interval input argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## df AIC
## - share_interior_plastered_yes 1 354.55
## - share_ocu_unskilled 1 354.65
## - share_water_heater_yes 1 355.16
## - share_ocu_agriculture 1 355.52
## - share_ocu_military 1 355.66
## - share_urbrur_urban 1 356.23
## <none> 356.51
## - share_v04_revoq_Missing 1 356.72
## - share_ocu_serviceSales 1 356.86
## - share_sex_male 1 357.01
## - share_kitchen_yes 1 357.02
## - share_ocu_adminSupport 1 357.62
## - share_ocu_technician 1 357.89
## + share_ocu_manager 1 358.49
## - share_car_yes 1 360.82
## - mean_p26_edad 1 362.60
## - share_health_insurance_yes 1 363.05
## - share_ocu_professional 1 364.83
## - share_read_knowing 1 374.67
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
##
## Step: AIC = 354.55
## Mean ~ mean_p26_edad + share_ocu_military + share_ocu_professional +
## share_ocu_technician + share_ocu_adminSupport + share_ocu_serviceSales +
## share_ocu_agriculture + share_ocu_unskilled + share_sex_male +
## share_read_knowing + share_urbrur_urban + share_health_insurance_yes +
## share_v04_revoq_Missing + share_car_yes + share_water_heater_yes +
## share_kitchen_yes
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## df AIC
## - share_ocu_unskilled 1 352.67
## - share_water_heater_yes 1 353.22
## - share_ocu_agriculture 1 353.53
## - share_ocu_military 1 353.69
## - share_urbrur_urban 1 354.24
## <none> 354.55
## - share_v04_revoq_Missing 1 354.75
## - share_ocu_serviceSales 1 354.86
## - share_kitchen_yes 1 355.12
## - share_ocu_adminSupport 1 355.62
## - share_sex_male 1 355.74
## - share_ocu_technician 1 355.89
## + share_interior_plastered_yes 1 356.51
## + share_ocu_manager 1 356.53
## - share_car_yes 1 359.79
## - share_health_insurance_yes 1 361.07
## - share_ocu_professional 1 363.27
## - mean_p26_edad 1 363.88
## - share_read_knowing 1 373.68
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
##
## Step: AIC = 352.67
## Mean ~ mean_p26_edad + share_ocu_military + share_ocu_professional +
## share_ocu_technician + share_ocu_adminSupport + share_ocu_serviceSales +
## share_ocu_agriculture + share_sex_male + share_read_knowing +
## share_urbrur_urban + share_health_insurance_yes + share_v04_revoq_Missing +
## share_car_yes + share_water_heater_yes + share_kitchen_yes
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## df AIC
## - share_water_heater_yes 1 351.36
## - share_ocu_agriculture 1 351.53
## - share_ocu_military 1 351.94
## - share_urbrur_urban 1 352.36
## <none> 352.67
## - share_v04_revoq_Missing 1 352.90
## - share_ocu_serviceSales 1 352.90
## - share_kitchen_yes 1 353.49
## - share_ocu_adminSupport 1 353.62
## - share_sex_male 1 353.89
## - share_ocu_technician 1 353.89
## + share_ocu_unskilled 1 354.55
## + share_interior_plastered_yes 1 354.65
## + share_ocu_manager 1 354.66
## - share_car_yes 1 358.71
## - share_health_insurance_yes 1 359.31
## - share_ocu_professional 1 361.33
## - mean_p26_edad 1 361.89
## - share_read_knowing 1 372.10
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
##
## Step: AIC = 351.36
## Mean ~ mean_p26_edad + share_ocu_military + share_ocu_professional +
## share_ocu_technician + share_ocu_adminSupport + share_ocu_serviceSales +
## share_ocu_agriculture + share_sex_male + share_read_knowing +
## share_urbrur_urban + share_health_insurance_yes + share_v04_revoq_Missing +
## share_car_yes + share_kitchen_yes
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## df AIC
## - share_ocu_agriculture 1 350.16
## - share_ocu_military 1 350.32
## - share_v04_revoq_Missing 1 351.33
## <none> 351.36
## - share_urbrur_urban 1 351.40
## - share_kitchen_yes 1 351.77
## - share_ocu_serviceSales 1 352.20
## - share_ocu_adminSupport 1 352.52
## - share_sex_male 1 352.65
## + share_water_heater_yes 1 352.67
## - share_ocu_technician 1 353.10
## + share_ocu_unskilled 1 353.22
## + share_ocu_manager 1 353.32
## + share_interior_plastered_yes 1 353.32
## - share_car_yes 1 356.88
## - share_health_insurance_yes 1 358.16
## - mean_p26_edad 1 361.37
## - share_ocu_professional 1 362.84
## - share_read_knowing 1 370.12
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
##
## Step: AIC = 350.16
## Mean ~ mean_p26_edad + share_ocu_military + share_ocu_professional +
## share_ocu_technician + share_ocu_adminSupport + share_ocu_serviceSales +
## share_sex_male + share_read_knowing + share_urbrur_urban +
## share_health_insurance_yes + share_v04_revoq_Missing + share_car_yes +
## share_kitchen_yes
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_military +
## share_ocu_professional + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## df AIC
## - share_ocu_military 1 348.95
## <none> 350.16
## - share_ocu_serviceSales 1 350.21
## - share_v04_revoq_Missing 1 350.94
## - share_kitchen_yes 1 351.02
## - share_urbrur_urban 1 351.05
## - share_sex_male 1 351.18
## + share_ocu_agriculture 1 351.36
## + share_water_heater_yes 1 351.53
## - share_ocu_adminSupport 1 351.80
## - share_ocu_technician 1 351.99
## + share_ocu_manager 1 352.09
## + share_ocu_unskilled 1 352.16
## + share_interior_plastered_yes 1 352.16
## - share_car_yes 1 354.99
## - share_health_insurance_yes 1 356.48
## - mean_p26_edad 1 359.43
## - share_ocu_professional 1 360.88
## - share_read_knowing 1 368.23
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
##
## Step: AIC = 348.95
## Mean ~ mean_p26_edad + share_ocu_professional + share_ocu_technician +
## share_ocu_adminSupport + share_ocu_serviceSales + share_sex_male +
## share_read_knowing + share_urbrur_urban + share_health_insurance_yes +
## share_v04_revoq_Missing + share_car_yes + share_kitchen_yes
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## df AIC
## <none> 348.95
## - share_v04_revoq_Missing 1 349.01
## - share_ocu_serviceSales 1 349.54
## - share_kitchen_yes 1 349.55
## - share_urbrur_urban 1 350.10
## + share_ocu_military 1 350.16
## - share_sex_male 1 350.17
## + share_ocu_agriculture 1 350.32
## - share_ocu_technician 1 350.54
## + share_water_heater_yes 1 350.57
## + share_ocu_manager 1 350.76
## - share_ocu_adminSupport 1 350.79
## + share_ocu_unskilled 1 350.94
## + share_interior_plastered_yes 1 350.94
## - share_car_yes 1 355.34
## - share_health_insurance_yes 1 355.71
## - mean_p26_edad 1 358.17
## - share_ocu_professional 1 361.02
## - share_read_knowing 1 370.07
## Warning in fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional +
## share_ocu_technician + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
##
## Call:
## fh(fixed = Mean ~ mean_p26_edad + share_ocu_professional + share_ocu_technician +
## share_ocu_adminSupport + share_ocu_serviceSales + share_sex_male +
## share_read_knowing + share_urbrur_urban + share_health_insurance_yes +
## share_v04_revoq_Missing + share_car_yes + share_kitchen_yes,
## vardir = "Var_Mean", combined_data = combined_data, domains = "ID_prov",
## method = "ml", MSE = TRUE)
##
## Coefficients:
## coefficients std.error t.value p.value
## (Intercept) 9.453181 5.703735 1.6574 0.0974454 .
## mean_p26_edad -0.199955 0.058906 -3.3945 0.0006876 ***
## share_ocu_professional 29.466319 7.653160 3.8502 0.0001180 ***
## share_ocu_technician 65.682717 34.608415 1.8979 0.0577115 .
## share_ocu_adminSupport -120.453130 61.268534 -1.9660 0.0493001 *
## share_ocu_serviceSales 9.465757 5.881384 1.6094 0.1075193
## share_sex_male -10.734112 5.985460 -1.7934 0.0729146 .
## share_read_knowing 14.674394 2.994312 4.9008 9.547e-07 ***
## share_urbrur_urban -1.665386 0.938380 -1.7747 0.0759400 .
## share_health_insurance_yes 6.771021 2.275802 2.9752 0.0029277 **
## share_v04_revoq_Missing -6.953541 4.843080 -1.4358 0.1510682
## share_car_yes -4.090169 1.411367 -2.8980 0.0037553 **
## share_kitchen_yes -2.494927 1.547109 -1.6126 0.1068231
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fh_formula_optimized <- Mean ~ mean_p26_edad + share_ocu_professional + share_ocu_technician +
share_ocu_adminSupport + share_ocu_serviceSales + share_sex_male +
share_read_knowing + share_urbrur_urban + share_health_insurance_yes +
share_v04_revoq_Missing + share_car_yes + share_kitchen_yes
### which is the optimized one
fh_model_full <- fh(fixed = fh_formula_optimized,
vardir = "Var_Mean", combined_data = combined_data, domains = "ID_prov",
method = "reml", MSE = TRUE)
fh_model_full$MSE$FH %>% mean()## [1] 0.225684
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## ℹ The deprecated feature was likely used in the emdi package.
## Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Press [enter] to continue
## Warning in fortify(data, ...): Arguments in `...` must be used.
## ✖ Problematic arguments:
## • fill = color[2]
## • color = color[2]
## ℹ Did you misspell an argument name?
## Press [enter] to continue
## Warning in fortify(data, ...): Arguments in `...` must be used.
## ✖ Problematic arguments:
## • fill = color[2]
## • color = color[2]
## ℹ Did you misspell an argument name?
## Press [enter] to continue
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the emdi package.
## Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## $qq_plots
## TableGrob (1 x 2) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
##
## $density_res
##
## $density_ran
##
## $cooks_distance
## [1] NA
##
## $likelihood
## [1] NA
## $scatter_FH
##
## $line_FH
##
## $scatter_FH
## NULL
##
## $line_FH
## NULL
##
## $scatter_FH
## NULL
##
## $line_FH
## NULL
##
## $boxplot_MSE_FH
##
## $ordered_MSE_FH
##
## $boxplot_CV_FH
##
## $ordered_CV_FH
fh_model_full_log <- fh(
fixed = fh_formula,
vardir = "Var_Mean",
combined_data = combined_data,
domains = "ID_prov",
method = "reml",transformation = "log",backtransformation = "bc_crude",
MSE = TRUE
)
plot(fh_model_full_log)## Press [enter] to continue
## Warning in fortify(data, ...): Arguments in `...` must be used.
## ✖ Problematic arguments:
## • fill = color[2]
## • color = color[2]
## ℹ Did you misspell an argument name?
## Press [enter] to continue
## Warning in fortify(data, ...): Arguments in `...` must be used.
## ✖ Problematic arguments:
## • fill = color[2]
## • color = color[2]
## ℹ Did you misspell an argument name?
## [1] 0.2094632
## [1] 7684281
## [1] 7350282
## [1] 7350282
dat_BHF_census$ID_prov <- dat_BHF_census$ID_prov %>% as.factor()
# sapply(dat_BHF_census,FUN = function(x) sum(is.na(x)))
nrow(sample)## [1] 2260
## [1] 2260
## [1] 2260
dat_BHF_sample$ID_prov <- dat_BHF_sample$ID_prov %>% as.factor()
sapply(dat_BHF_sample,FUN = function(x) sum(is.na(x)))## ID_prov aestudio
## 0 0
## p26_edad ocu_military
## 0 0
## ocu_manager ocu_professional
## 0 0
## ocu_technician ocu_adminSupport
## 0 0
## ocu_serviceSales ocu_agriculture
## 0 0
## ocu_construction ocu_operators
## 0 0
## ocu_unskilled ocu_NaN
## 0 0
## ocu_1d_13_Missing sex_female
## 0 0
## sex_male read_knowing
## 0 0
## read_not_knowing read_not_specified
## 0 0
## urbrur_urban urbrur_rural
## 0 0
## health_insurance_yes health_insurance_no
## 0 0
## health_insurance_not_specified p30b_caja_Missing
## 0 0
## interior_plastered_yes interior_plastered_no
## 0 0
## v04_revoq_Missing car_yes
## 0 0
## car_no car_not_specified
## 0 0
## v18c_auto_Missing water_heater_yes
## 0 0
## water_heater_no water_heater_not_specified
## 0 0
## v18h_calefon_Missing kitchen_yes
## 0 0
## kitchen_no v12_cocina_Missing
## 0 0
## n
## 0
BHF_model_full_unoptimized <- NER_Trafo(fixed = bhf_formula
,smp_domains = "ID_prov"
, smp_data = dat_BHF_sample
, pop_data = dat_BHF_census
, pop_area_size = vec_pop_size
, pop_domains = "ID_prov"
, transformation = "no"
, seed = 2022
, MSE = T
)## [1] "More SAE methods for full population data and transformations are offered in the R package emdi."
## [1] 0.2790526
## Loading required package: lme4
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:emdi':
##
## step
## The following object is masked from 'package:stats':
##
## step
### model selection
full_model <- lm(bhf_formula,data = sample)
bhf_formula_intercept <- aestudio ~ p26_edad + ocu_military + ocu_manager + ocu_professional +
ocu_technician + ocu_adminSupport + ocu_serviceSales + ocu_agriculture +
ocu_unskilled + sex_male + read_knowing + urbrur_urban +
health_insurance_yes + interior_plastered_yes + v04_revoq_Missing +
car_yes + water_heater_yes + kitchen_yes + (1|ID_prov)
lmer_model <- lmer(bhf_formula_intercept,data = sample)
# lmer_model %>% summary()
step(lmer_model)## Backward reduced random-effect table:
##
## Eliminated npar logLik AIC LRT Df Pr(>Chisq)
## <none> 21 -6068.9 12180
## (1 | ID_prov) 0 20 -6079.0 12198 20.159 1 7.125e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite
##
## Eliminated Sum Sq Mean Sq NumDF DenDF F value
## v04_revoq_Missing 1 33.3 33.3 1 2240.7 2.7201
## p26_edad 0 5541.1 5541.1 1 2241.3 451.6769
## ocu_military 0 83.8 83.8 1 2222.2 6.8312
## ocu_manager 0 104.2 104.2 1 2239.5 8.4936
## ocu_professional 0 3637.3 3637.3 1 2231.0 296.4944
## ocu_technician 0 521.3 521.3 1 2217.6 42.4959
## ocu_adminSupport 0 178.7 178.7 1 2224.9 14.5638
## ocu_serviceSales 0 59.7 59.7 1 2231.4 4.8659
## ocu_agriculture 0 514.7 514.7 1 2236.7 41.9543
## ocu_unskilled 0 77.3 77.3 1 2221.1 6.3046
## sex_male 0 193.5 193.5 1 2218.0 15.7743
## read_knowing 0 4182.1 4182.1 1 2235.8 340.9007
## urbrur_urban 0 127.0 127.0 1 1222.8 10.3512
## health_insurance_yes 0 315.4 315.4 1 2242.0 25.7082
## interior_plastered_yes 0 311.8 311.8 1 2107.4 25.4185
## car_yes 0 97.7 97.7 1 2239.7 7.9621
## water_heater_yes 0 109.8 109.8 1 2237.5 8.9509
## kitchen_yes 0 114.0 114.0 1 2241.8 9.2940
## Pr(>F)
## v04_revoq_Missing 0.0992343 .
## p26_edad < 2.2e-16 ***
## ocu_military 0.0090184 **
## ocu_manager 0.0035994 **
## ocu_professional < 2.2e-16 ***
## ocu_technician 8.742e-11 ***
## ocu_adminSupport 0.0001392 ***
## ocu_serviceSales 0.0274934 *
## ocu_agriculture 1.145e-10 ***
## ocu_unskilled 0.0121132 *
## sex_male 7.364e-05 ***
## read_knowing < 2.2e-16 ***
## urbrur_urban 0.0013279 **
## health_insurance_yes 4.297e-07 ***
## interior_plastered_yes 5.009e-07 ***
## car_yes 0.0048189 **
## water_heater_yes 0.0028037 **
## kitchen_yes 0.0023259 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Model found:
## aestudio ~ p26_edad + ocu_military + ocu_manager + ocu_professional + ocu_technician + ocu_adminSupport + ocu_serviceSales + ocu_agriculture + ocu_unskilled + sex_male + read_knowing + urbrur_urban + health_insurance_yes + interior_plastered_yes + car_yes + water_heater_yes + kitchen_yes + (1 | ID_prov)
Suggested model by step funktion bhf_formula_optimized <- aestudio ~ p26_edad + ocu_military + ocu_manager + ocu_professional + ocu_technician + ocu_adminSupport + ocu_serviceSales + ocu_agriculture + ocu_unskilled + sex_male + read_knowing + urbrur_urban + health_insurance_yes + interior_plastered_yes + car_yes + water_heater_yes + kitchen_yes + (1 | ID_prov)
bhf_formula_optimized <- aestudio ~ p26_edad + ocu_military + ocu_manager + ocu_professional + ocu_technician + ocu_adminSupport + ocu_serviceSales + ocu_agriculture + ocu_unskilled + sex_male + read_knowing + urbrur_urban + health_insurance_yes + interior_plastered_yes + car_yes + water_heater_yes + kitchen_yes
BHF_model_full <- NER_Trafo(fixed = bhf_formula_optimized
,smp_domains = "ID_prov"
, smp_data = dat_BHF_sample
, pop_data = dat_BHF_census
, pop_area_size = vec_pop_size
, pop_domains = "ID_prov"
, transformation = "no"
, seed = 2022
, MSE = T
)## [1] "More SAE methods for full population data and transformations are offered in the R package emdi."
## [1] 0.2792847
## Press [enter] to continue
## Warning in fortify(data, ...): Arguments in `...` must be used.
## ✖ Problematic arguments:
## • fill = color[2]
## • color = color[2]
## ℹ Did you misspell an argument name?
## Press [enter] to continue
## Warning in fortify(data, ...): Arguments in `...` must be used.
## ✖ Problematic arguments:
## • fill = color[2]
## • color = color[2]
## ℹ Did you misspell an argument name?
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
## Press [enter] to continue
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:lubridate':
##
## stamp
p1 <- p_dig_FH$density_res
p2 <- p_dig_FH$density_res +
# scale_color_manual(values = c("red")) +
# scale_fill_manual(values = c("red")) +
geom_density(color = "#FF8552",fill = "#FF8552",alpha = .15, linewidth = .4) +
# geom_hline(yintercept = 0, linetype = "dashed", color = "grey40") +
theme_minimal() +
labs(
title = "FH - Residual Assumption",
# subtitle = "Fay–Herriot model",
x = "standardized Residuals",
y = "Density",
# caption = "Source: household survey + census"
)
p3 <- p_dig_FH$density_ran
p4 <- p_dig_FH$density_ran +
# scale_color_manual(values = c("red")) +
# scale_fill_manual(values = c("red")) +
geom_density(color = "#FF8552",fill = "#FF8552",alpha = .15, linewidth = .4) +
# geom_hline(yintercept = 0, linetype = "dashed", color = "grey40") +
theme_minimal() +
labs(
title = "FH - Randome Effects Assumption",
# subtitle = "Fay–Herriot model",
x = "standardized randome effects",
y = "Density",
# caption = "Source: household survey + census"
)
plot_grid(p1,p2,p3,p4,nrow = 2,ncol = 2)p5 <- p_dig_FH_comp$scatter_FH
p6 <- p_dig_FH_comp$scatter_FH +
geom_smooth(aes(color = "Reg. line"), method = "lm", se = FALSE) +
scale_color_manual(values = c("Reg. line" = "#9A031E", "Intersection" = "#FF8552")) +
theme_minimal() +
labs(
title = "direct vs fh estimates",
x = "direct",
y = "fh-model",
color = ""
)## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p7 <- p_dig_FH_comp$line_FH
p8 <- p_dig_FH_comp$line_FH +
# Override layer colors to be mapped instead of fixed
# geom_line(aes(color = c("Direct", "Model-based"))) +
# geom_line(size = .8,alpha = .3) +
# geom_point(size = 1, alpha = .5) +
scale_color_manual(values = c("#9A031E","#FF8552")) +
theme_minimal() +
labs(
title = "Direct vs FH estimates ordered by Domain size",
x = "Domain - ordered by decreasing MSE of Direct",
y = "mean years of schooling estimate",
color = "Method"
)## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p9 <- p_dig_FH_comp$ordered_MSE_FH
p10 <- p_dig_FH_comp$ordered_MSE_FH +
scale_color_manual(values = c("#9A031E","#FF8552")) +
theme_minimal() +
labs(
title = "MSE: direct vs FH",
# subtitle = "Fay–Herriot model",
x = "Domain - ordered by increasing MSE of Direct",
y = "MSE",
# caption = "Source: household survey + census"
)## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## `geom_smooth()` using formula = 'y ~ x'
ggsave("../output/diagnostics_fh.svg",
plot = plot_grid_fh,
width = 6, # width in inches
height = 16, # height in inches; make it long enough for 5 plots
units = "in", # units for width/height
device = "svg")
ggsave("../output/diagnostics_fh.png",
plot = plot_grid_fh,
width = 6, # width in inches
height = 16, # height in inches; make it long enough for 5 plots
units = "in", # units for width/height
dpi = 300)p1 <- p_dig_BHF$density_res
p2 <- p_dig_BHF$density_res +
# scale_color_manual(values = c("red")) +
# scale_fill_manual(values = c("red")) +
geom_density(color = "#7ca982",fill = "#7ca982",alpha = .15, linewidth = .4) +
# geom_hline(yintercept = 0, linetype = "dashed", color = "grey40") +
theme_minimal() +
labs(
title = "BHF - Residual Assumption",
# subtitle = "Fay–Herriot model",
x = "standardized Residuals",
y = "Density",
# caption = "Source: household survey + census"
)
p3 <- p_dig_BHF$density_ran
p4 <- p_dig_FH$density_ran +
# scale_color_manual(values = c("red")) +
# scale_fill_manual(values = c("red")) +
geom_density(color = "#7ca982",fill = "#7ca982",alpha = .15, linewidth = .4) +
# geom_hline(yintercept = 0, linetype = "dashed", color = "grey40") +
theme_minimal() +
labs(
title = "BHF - Randome Effects Adsumption",
# subtitle = "Fay–Herriot model",
x = "standardized randome effects",
y = "Desnity",
# caption = "Source: household survey + census"
)
plot_grid(p1,p2,p3,p4,nrow = 2,ncol = 2)p5 <- p_dig_BHF_comp$scatter_Mean
p6 <- p_dig_BHF_comp$scatter_Mean +
geom_smooth(aes(color = "Reg. line"), method = "lm", se = FALSE) +
scale_color_manual(values = c("Reg. line" = "#4e6b57", "Intersection" = "#b5c5b4")) +
theme_minimal() +
labs(
title = "direct vs BHF estimates",
x = "direct",
y = "BHF-Mode",
color = ""
)## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p7 <- p_dig_BHF_comp$line_Mean
p8 <- p_dig_BHF_comp$line_Mean +
# Override layer colors to be mapped instead of fixed
# geom_line(aes(color = c("Direct", "Model-based"))) +
# geom_line(aes(size = .8,alpha = .3)) +
# geom_point(aes(size = 1, alpha = .2)) +
scale_color_manual(values = c("#4e6b57","#b5c5b4")) +
theme_minimal() +
labs(
title = "Direct vs BHF estimates ordered by Domain size",
x = "Domain - ordered by decreasing MSE of direct estimation",
y = "mean years of schooling estimate",
color = "Method"
)## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p9 <- p_dig_BHF_comp$ordered_MSE_Mean
p10 <- p_dig_BHF_comp$ordered_MSE_Mean +
scale_color_manual(values = c("#4e6b57","#b5c5b4")) +
theme_minimal() +
labs(
title = "MSE: direct vs BHF",
# subtitle = "Fay–Herriot model",
x = "Domain - ordered by increasing MSE of direct estimation",
y = "MSE",
# caption = "Source: household survey + census"
)## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## `geom_smooth()` using formula = 'y ~ x'
ggsave("../output/diagnostics_bhf.svg",
plot = plot_grid_bhf,
width = 6, # width in inches
height = 16, # height in inches; make it long enough for 5 plots
units = "in", # units for width/height
device = "svg")
ggsave("../output/disgnostics_bhf.png",
plot = plot_grid_bhf,
width = 6, # width in inches
height = 16, # height in inches; make it long enough for 5 plots
units = "in", # units for width/height
dpi = 300)